!! Copyright (C) 2009,2010,2011,2012  Marco Restelli
!!
!! This file is part of:
!!   LDGH -- Local Hybridizable Discontinuous Galerkin toolkit
!!
!! LDGH is free software: you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation, either version 3 of the License, or
!! (at your option) any later version.
!!
!! LDGH is distributed in the hope that it will be useful, but WITHOUT
!! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
!! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
!! License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with LDGH. If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Marco Restelli                   <marco.restelli@gmail.com>


!>\brief
!! Starting from the hybrid variable \f$\lambda\f$, reconstruct the
!! complete solution and the superconvergent variables.
!!
!! \n
!!
!! Currently implemented reconstructions:
!! <ul>
!!  <li> primal variable \f$u\f$
!!  <li> primal superconvergent variable \f$u^\star\f$
!!  <li> primal superconvergent variable \f$u^*\f$
!!  <li> dual variable \f$\underline{q}\f$
!!  <li> dual \f$H(div,\Omega)\f$ variable
!!   \f$\underline{q}^\star_{\mathbb{RT}}\f$.
!! </ul>
!!
!! \note The local matrices are recomputed on the fly. This choice is
!! done to avoid storing the local matrices for all the
!! reconstructions during the computations of \f$\lambda\f$.
!!
!! This module also defines the variable <tt>qhn</tt>, representing
!! \f$\hat{\underline{q}}\cdot\underline{n}|_{\partial K}\f$. To
!! represent this quantity, we rely on the fact that it is a
!! polynomial function belonging to the same space as the hybrid
!! variable \f$\lambda\f$. It is thus represented in terms of the
!! hybrid basis. More specifically, <tt>qhn</tt> is a rank three
!! array where the first index corresponds to the degree of freedom in
!! terms of the hybrid basis, the second index corresponds to the
!! local side index and the third index corresponds to the element.
!! Notice that, a priori, 
!! \f$\hat{\underline{q}}\cdot\underline{n}|_{\partial K}\f$ is not a
!! single valued function on the side, and due to the reciprocity
!! condition of the system we have that, on the two elements sharing
!! one side, the value is the same up to a sign change.
!<
module mod_ldgh_reconstructions

!-----------------------------------------------------------------------

 use mod_utils, only: &
   t_realtime, my_second

 use mod_messages, only: &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_initialized, &
   wp

 use mod_linal, only: &
   mod_linal_initialized, &
   invmat_nop, &
   invmat_chol,&
   linsys, &
   linsys_chol

 use mod_sympoly, only: &
   mod_sympoly_initialized, &
   t_sympol,     &
   operator(*),  &
   ev,           &
   me_int

 use mod_grid, only: &
   mod_grid_initialized, &
   t_grid, affmap

 use mod_base, only: &
   mod_base_initialized, &
   t_base

 use mod_bcs, only: &
   mod_bcs_initialized, &
   t_bcs

 use mod_fe_spaces, only: &
   mod_fe_spaces_initialized, &
   dg_scal, &
   rt_vect, &
   dg_vect, &
!   solen_vect, &
   rt_vect_canonical_dofs
!   adapt_dg_vect

 use mod_tau, only: &
   mod_tau_initialized, &
   taus

 use mod_ldgh_locmat, only: &
   mod_ldgh_locmat_initialized, &
   ldgh_locmat, t_bcs_error

 use mod_testcases, only: &
   mod_testcases_initialized, &
   coeff_diff,  &
   coeff_adv,   &
   coeff_xiadv, &
   coeff_re,    &
   coeff_f

 use mod_output_control, only: &
   mod_output_control_initialized, &
   elapsed_format

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------

! Module interface

 public :: &
   mod_ldgh_reconstructions_constructor, &
   mod_ldgh_reconstructions_destructor,  &
   mod_ldgh_reconstructions_initialized, &
   uuu, qqq, qhn, &
   bases, uuus, uuusef, xiadv_el, &
!   base_qsdg, qqqsdg,   &
   base_qsrt, qqqsrt

 private

!-----------------------------------------------------------------------

! Module types and parameters

! Module variables

 ! public members
 real(wp), allocatable, protected :: uuu(:), qqq(:), qhn(:,:,:)
 type(t_base), save, protected :: bases, base_qsrt!, base_qsdg
 real(wp), allocatable, protected :: &
   uuus(:), uuusef(:), xiadv_el(:), qqqsrt(:)!, qqqsdg(:)
 logical, protected ::               &
   mod_ldgh_reconstructions_initialized = .false.
 
 ! private members
 logical :: compute_usef
 character(len=*), parameter :: &
   this_mod_name = 'mod_ldgh_reconstructions'

!-----------------------------------------------------------------------

contains

!-----------------------------------------------------------------------

 !> Constructor: takes care of all the reconstructions.
 !!
 !! In this subroutine the local matrices are recomputed in order to
 !! reconstruct the primal and dual variables from the hybrid one. One
 !! could avoid repeating these computations by storing the four
 !! matries \c uuk, \c vvk, \c qqk and \c wwk during the construction
 !! of the linear system, in which case \c uuu and \c qqq can be
 !! immediately obtained from \c lam. This would require approximately
 !! as much memory as the one used for the system matrix \c mmm.
 !<
 subroutine mod_ldgh_reconstructions_constructor(grid,bcs,base,lam, &
   gcompute_usef)
  type(t_grid), target, intent(in) :: grid
  type(t_bcs), target, intent(in) :: bcs
  type(t_base), intent(in) :: base
  real(wp), intent(in) :: lam(:)
  logical, intent(in) :: gcompute_usef

  integer :: d, mk, pk, nk, ie, isl, is, i, j
  real(t_realtime) :: t00, t0, dt
  real(wp) :: &
    mm(base%nk,base%nk), mmi(base%nk,base%nk), gg(base%nk,base%ms),    &
    pp(base%nk,base%ms), qhn_xig(base%ms),                             &
    aak(base%mk,base%mk), bbk(base%mk,base%pk),                        &
                                   cck(base%mk,(base%me%d+1)*base%nk), &
    ddk(base%pk,base%mk), eek(base%pk,base%pk),                        &
                     hhk(base%pk,(base%me%d+1)*base%nk), f2k(base%pk), &
    ffk((base%me%d+1)*base%nk,base%mk),                                &
    ggk((base%me%d+1)*base%nk,base%pk),                                &
    llk((base%me%d+1)*base%nk,(base%me%d+1)*base%nk),                  &
                                           f3k((base%me%d+1)*base%nk), &
   aaki(base%mk,base%mk), wwk(base%mk,base%pk),                        &
    qqk(base%mk,(base%me%d+1)*base%nk), ddkaaki(base%pk,base%mk),      &
    vvk(base%pk,base%pk), uuk(base%pk,(base%me%d+1)*base%nk)
  real(wp) :: lamk((grid%me%d+1)*base%nk), uuuk(base%pk), qqqk(base%mk)
  real(wp) :: qns(base%ms), ubs(base%ms), ebs(base%nk,base%ms), &
    uhbs(base%ms)
  type(t_bcs_error) :: b_err
  character(len=*), parameter :: &
    this_sub_name = 'constructor'

   !Consistency checks ---------------------------
   if( (mod_messages_initialized.eqv..false.) .or. &
       (mod_kinds_initialized.eqv..false.)    .or. &
       (mod_linal_initialized.eqv..false.)    .or. &
       (mod_sympoly_initialized.eqv..false.)  .or. &
       (mod_grid_initialized.eqv..false.)     .or. &
       (mod_base_initialized.eqv..false.)     .or. &
       (mod_bcs_initialized.eqv..false.)      .or. &
       (mod_fe_spaces_initialized.eqv..false.).or. &
       (mod_tau_initialized.eqv..false.)      .or. &
       (mod_ldgh_locmat_initialized.eqv..false.).or. &
       (mod_testcases_initialized.eqv..false.).or. &
       (mod_output_control_initialized.eqv..false.) ) then
     call error(this_sub_name,this_mod_name, &
                'Not all the required modules are initialized.')
   endif
   if(mod_ldgh_reconstructions_initialized.eqv..true.) then
     call warning(this_sub_name,this_mod_name, &
                  'Module is already initialized.')
   endif
   !----------------------------------------------

   !--------------------------------------------------------------------
   ! Switching from the nodal representation of qhn to its
   ! representation in terms of the hybrid basis amounts to a
   ! projection operator. We can precompute the matrix of the
   ! projector: it is given as
   !   P = M^{-1}G
   ! where (exploiting the accuracy of the boundary nodes)
   !  M is the mass matrix of the hybrid basis \eta_i
   !  G is the matrix G_{ij} = w_j \eta_i(x_j)
   do j=1,base%nk
     do i=1,j
       mm(i,j) = me_int( base%me%d-1 , base%e_s(i)*base%e_s(j) )
       mm(j,i) = mm(i,j)
     enddo
   enddo
   call invmat_chol( mm , mmi )
   do j=1,base%ms
     do i=1,base%nk
       gg(i,j) = base%wgs(j)*base%e(i,j)
     enddo
   enddo
   pp = matmul(mmi,gg)
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   ! Primal and dual variables
   t00 = my_second()
   t0 = my_second()

   !$omp parallel private(d, mk  , pk  , nk  ,   &
   !$omp                  aak , bbk , cck ,      &
   !$omp                  ddk , eek , hhk , f2k, &
   !$omp                  ffk , ggk , llk , f3k, &
   !$omp                 aaki , wwk , qqk ,      &
   !$omp              ddkaaki , vvk , uuk ,      &
   !$omp                  lamk, uuuk, qqqk,      &
   !$omp                  qns , ubs , ebs , uhbs,&
   !$omp                  qhn_xig ,              &
   !$omp                  ie  ,b_err, isl , is ) &
   !$omp           shared(base,grid,taus,bcs,    &
   !$omp                  lam,uuu,qqq,qhn,pp)    &
   !$omp          default(none)

   d  = base%me%d
   mk = base%mk
   pk = base%pk
   nk = base%nk
   !$omp single
    allocate(uuu(grid%ne*pk),qqq(grid%ne*mk),qhn(base%nk,d+1,grid%ne))
   !$omp end single

   !$omp do schedule(static)
   elem_loop: do ie=1,grid%ne

     !------------------------------------------------------------------
     !1) compute local matrices
     call ldgh_locmat( aak , bbk , cck ,      &
                       ddk , eek , hhk , f2k, &
                       ffk , ggk , llk , f3k, &
                       base, grid%e(ie), taus(ie), &
                       bcs%b_e2be(ie), b_err )

     if(b_err%lerr) &
       call error(this_sub_name,this_mod_name,b_err%message)
     !------------------------------------------------------------------

     !------------------------------------------------------------------
     !2) inverse static condensation

     ! aaki = inv(aak)
     call invmat_chol( aak , aaki )
     ! ddkaaki = ddk*aaki
     ddkaaki = matmul(ddk,aaki)

     ! vvk  = inv(eek - ddk*aaki*bbk)
     call invmat_nop( eek - matmul(ddkaaki,bbk) , vvk )
     ! uuk  = vvk*(ddk*aaki*cck - hhk)
     uuk = matmul( vvk , matmul(ddkaaki,cck)-hhk )
    
     ! qqk  = -aaki*(bbk*uuk + cck)
     qqk = -matmul( aaki , matmul(bbk,uuk)+cck )
     ! wwk  = -aaki*bbk*vvk
     wwk = -matmul(aaki,matmul(bbk,vvk))

     ! elementwise solution lambda
     do isl=1,d+1
       is = grid%e(ie)%is(isl)
       ! is -> isl
       lamk((isl-1)*nk+1 : isl*nk) = lam((is-1)*nk+1 : is*nk)
     enddo

     ! uuuk = uuk*lamk + vvk*f2k
     uuuk = matmul(uuk,lamk) + matmul(vvk,f2k)
     ! qqqk = qqk*lamk + wwk*f2k
     qqqk = matmul(qqk,lamk) + matmul(wwk,f2k)
   
     ! store the computed values back into global arrays
     uuu((ie-1)*pk+1 : ie*pk) = uuuk
     qqq((ie-1)*mk+1 : ie*mk) = qqqk
     !------------------------------------------------------------------

     !------------------------------------------------------------------
     !3) numerical fluxes (including tau)
     do isl=1,d+1

       ! q \cdot n: using the Piola transformation, the flux is
       !            constant
       is = grid%e(ie)%is(isl)
       qns = (base%me%a(isl)/grid%s(is)%a)*matmul(qqqk,base%no(:,:,isl))

       ! u
       ubs = matmul(uuuk,base%pb(:,:,isl))

       ! \hat{u} \equiv \lambda
       ebs = base%e(:,base%stab(grid%e(ie)%ip(isl),:))
       uhbs = matmul( lamk((isl-1)*nk+1 : isl*nk) , ebs )

       ! hat{q} \cdot n = q \cdot n + tau*(u-\hat{u})
       qhn_xig = qns + taus(ie)%tau(:,isl)*(ubs-uhbs)
       qhn(:,isl,ie) = matmul(pp,qhn_xig)

     enddo
     !------------------------------------------------------------------
   enddo elem_loop
   !$omp end do nowait
   !$omp end parallel

   dt = my_second()-t0
   call time_message('Primal and dual reconstructions')

   !--------------------------------------------------------------------
   ! Superconvergent reconstructions
   
   ! 1) u^\star
   t0 = my_second()
   call rec_us(grid,base,lam) ! includes allocate(uuus)
   dt = my_second()-t0
   call time_message('u^\star reconstruction')

   ! 2) u^* (exponential fitting): computed only for suitable
   ! advection fields (this function requires the variable bases
   ! defined in rec_us)
   compute_usef = gcompute_usef
   if(compute_usef) then
     t0 = my_second()
     call rec_usef(grid,base,lam) ! includes allocate(uuusef,xiadv_el)
     dt = my_second()-t0
     call time_message('u^* reconstruction')
   endif

!   ! 3) q^\star_{DG}
!   if(base%r.gt.0) then
!     t0 = my_second()
!
!     call rec_qsdg(base) ! includes allocate(qqqsdg)
!
!     dt = my_second()-t0
!     call time_message('q^\star_{DG} reconstruction')
!   endif

   ! 4) q^\star_{RT}
   t0 = my_second()
   call rec_qsrt(grid,base) ! includes allocate(qqqsrt)
   dt = my_second()-t0
   call time_message('q^\star_{RT} reconstruction')

   ! 5) final summary
   dt = my_second()-t00
   call time_message('All reconstructions')

   mod_ldgh_reconstructions_initialized = .true.
  contains

  subroutine time_message(msg)
   character(len=*), intent(in) :: msg

   character(len=1000) :: message, description

    write(description,'(a,a)') msg,' completed: elapsed time '
    write(message,elapsed_format) trim(description),dt,'s.'
    call info(this_sub_name,this_mod_name,message)

  end subroutine time_message

 end subroutine mod_ldgh_reconstructions_constructor

!-----------------------------------------------------------------------
 
 subroutine mod_ldgh_reconstructions_destructor()
  character(len=*), parameter :: &
    this_sub_name = 'destructor'
   
   !Consistency checks ---------------------------
   if(mod_ldgh_reconstructions_initialized.eqv..false.) then
     call error(this_sub_name,this_mod_name, &
                'This module is not initialized.')
   endif
   !----------------------------------------------

   deallocate( uuu , qqq , qhn )
   deallocate( uuus , qqqsrt )
   if(compute_usef) deallocate(uuusef,xiadv_el)
!   if(allocated(qqqsdg)) deallocate(qqqsdg)

   mod_ldgh_reconstructions_initialized = .false.
 end subroutine mod_ldgh_reconstructions_destructor

!-----------------------------------------------------------------------
 
 !> Reconstruction \f$u^\star\f$: basis and degrees of freedom.
 !!
 !! \f$u^\star\f$ is computed solving, for each element, the problem
 !! \f{equation}{
 !!    \left\{ \begin{array}{rcll}
 !!    \displaystyle \nabla\cdot\left(-\mu\nabla u^\star +
 !!    au^\star\right) + ru^\star&=&f & {\rm in} \,\,K \\[2mm]
 !!     \left(-\mu\nabla u^\star + au^\star\right)\cdot n&=&\hat{q}_n &
 !!    {\rm on}\,\,\partial K
 !!    \end{array}\right.
 !! \f}
 !! If \f$r=0\f$, we introduce the additional condition
 !! \f{equation}{
 !!    \int_Ku^\star\,dx = \int_Ku\,dx
 !! \f}
 !! for \f$k\geq1\f$ and
 !! \f{equation}{
 !!    \int_Ku^\star\,dx = \frac{1}{d+1}\sum_{i=1}^{d+1}\lambda_i
 !! \f}
 !! if \f$k=0\f$.
 !<
 subroutine rec_us(grid,base,lam)
  type(t_grid), target, intent(in) :: grid
  type(t_base), intent(in) :: base
  real(wp), intent(in) :: lam(:)
 
  integer :: ie, l, i, j, isl
  real(wp) :: ubar, uuuk(base%pk), a_grad_sigma_p
  real(wp), allocatable :: xg(:,:), mu(:,:,:), a(:,:), sigma(:), &
    f(:), ssk(:,:), fk(:), bi_mu_bit(:,:), bi_a(:), uuusk(:),    &
    mu_grad(:), e_low(:,:), wgsa(:), qhn_s(:)
  character(len=*), parameter :: &
    this_sub_name = 'rec_us'

   !--------------------------------------------------------------------
   !1) Define the high order basis.
   ! A nontrivial issue arises in the definition of the quadrature
   ! nodes on the boundaries, where the new, high-order reconstruction
   ! has to recover the boundary condition given by the lower order
   ! solution. For two-dimensional problems one could rely on the fact
   ! that Gaussian nodes which integrate 2k functions also integrate
   ! 2k+1 functions, but this handy property is lost in higher
   ! dimension. For this reason, we use here high order nodes and
   ! re-interpolate the numerical flux, i.e. the boundary condition,
   ! on these nodes.
   bases = dg_scal( base%me , base%k+1 )
   ! evaluate the old hybrid basis on the new boundary nodes
   allocate(e_low(base%nk,bases%ms))
   do i=1,base%nk
     e_low(i,:) = ev(base%e_s(i),bases%xigs)
   enddo
   !--------------------------------------------------------------------

   !$omp parallel private(xg,mu,a,sigma,f,bi_mu_bit,bi_a, &
   !$omp                  mu_grad,a_grad_sigma_p,         &
   !$omp                  ssk,fk,uuusk, ie,l,i,j,isl,     &
   !$omp                  wgsa,ubar,uuuk,qhn_s)           &
   !$omp           shared(base,bases,e_low,uuus,grid,lam,uuu,qhn,&
   !$omp                  coeff_diff,coeff_adv,coeff_re,coeff_f) &
   !$omp          default(none)

   !--------------------------------------------------------------------
   !1+1/2) Allocations
   !$omp single
    allocate(uuus(grid%ne*bases%pk)) ! module variable allocation
   !$omp end single
   allocate( xg(grid%m,bases%m) , mu(grid%m,grid%m,bases%m) ,    &
     a(grid%m,bases%m) , sigma(bases%m) , f(bases%m) ,           &
     mu_grad(grid%m) , bi_mu_bit(grid%m,grid%m) , bi_a(grid%m) , &
     ssk(bases%pk,bases%pk) , fk(bases%pk) , uuusk(bases%pk) )
   allocate( wgsa(bases%ms) , qhn_s(bases%ms) )
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !2) Solve the local elliptic problems
   !$omp do schedule(static)
   elem_loop: do ie=1,grid%ne

     !2.1) problem coefficients
     xg = affmap(grid%e(ie),bases%xig)
     mu    = coeff_diff(xg)
     a     = coeff_adv(xg)
     sigma = coeff_re(xg)
     f     = coeff_f(xg)

     !2.2) local matrix
     !2.2.1) internal contributions
     ssk = 0.0_wp
     fk = 0.0_wp
     do l=1,bases%m
       ! bi_mu_bit = b^{-1} * mu * b^{-T}
       bi_mu_bit = matmul( grid%e(ie)%bi ,                       &
                      matmul(mu(:,:,l),transpose(grid%e(ie)%bi)) )
       ! bi_a = b^{-1} * a
       bi_a = matmul( grid%e(ie)%bi , a(:,l) )
       do i=1,bases%pk
         mu_grad = matmul( bi_mu_bit , bases%gradp(:,i,l) )
         a_grad_sigma_p = - dot_product( bi_a , bases%gradp(:,i,l) ) &
                                    + sigma(l) * bases%p(i,l)
         do j=1,bases%pk
           ssk(i,j) = ssk(i,j) + bases%wg(l) * (                      &
                     dot_product( bases%gradp(:,j,l) , mu_grad )      &
                                      + bases%p(j,l) * a_grad_sigma_p )
         enddo
         fk(i) = fk(i) + bases%wg(l) * f(l) * bases%p(i,l)
       enddo
     enddo
     ssk = grid%e(ie)%det_b*ssk
     fk  = grid%e(ie)%det_b*fk

     !2.2.2) boundary contributions
     do isl=1,grid%d+1 ! loop on the sides

       ! Notice that the integrand qhn*phi_i is a scalar function, so
       ! we simply have to rescale the integral according to the side
       ! measure. While rescaling the quadrature weights, we also sort
       ! them according to the permutation table (although this would
       ! not be necessary, due to the symmetry of the weights).
       wgsa = (grid%e(ie)%s(isl)%p%a/bases%me%voldm1) * &
                bases%wgs(bases%stab(grid%e(ie)%ip(isl),:))
       qhn_s = matmul(qhn(:,isl,ie),e_low)

       do l=1,bases%ms
         do i=1,bases%pk
           fk(i) = fk(i) - wgsa(l) * qhn_s(l)*bases%pb(i,l,isl)
         enddo
       enddo
     enddo

     !2.3) force the mean value if sigma is zero
     if(minval(sigma).le.0.0_wp) then
       !2.3.1) local solution
       if(base%k.eq.0) then
         ! for k.eq.0 the dofs coincide with the edges
         ubar = sum(lam(grid%e(ie)%is))/real(grid%d+1,wp)
       else
         uuuk = uuu((ie-1)*base%pk+1 : ie*base%pk)
         ! compute the mean value
         ubar = 0.0_wp
         do l=1,base%m
           do j=1,base%pk
             ubar = ubar + base%wg(l)*base%p(j,l)*uuuk(j)
           enddo
         enddo
         ubar = ubar/base%me%vol ! mean value
       endif
       !2.3.2) assuming that the first test function is constant, we
       !replace the first equation with the condition on the average
       ssk(1,:) = 0.0_wp
       do l=1,bases%m
         do j=1,bases%pk
           ssk(1,j) = ssk(1,j) + bases%wg(l)*bases%p(j,l)
         enddo
       enddo
       ssk(1,:) = ssk(1,:)/base%me%vol
       fk(1) = ubar
     endif

     !2.4) solve the local problem
     call linsys(ssk,fk,uuusk)

     !2.5) store back the computed value
     uuus((ie-1)*bases%pk+1 : ie*bases%pk) = uuusk

   enddo elem_loop
   !$omp end do nowait
   !--------------------------------------------------------------------
 
   deallocate( xg ,               &
     mu , a , sigma , f ,         &
     mu_grad , bi_mu_bit , bi_a , &
     ssk , fk , uuusk , wgsa , qhn_s )
   !$omp end parallel

   deallocate( e_low )

 end subroutine rec_us
 
!-----------------------------------------------------------------------

 !> Compute the reconstruction \f$u^*\f$ using the basis \c bases
 !! defined in \c rec_us.
 !!
 !! \f$u^*\f$ is computed solving, for each element, the problem
 !! \f{equation}{
 !!    \left\{ \begin{array}{rcll}
 !!    \displaystyle \nabla\cdot\left(-\mu e^\xi\nabla \nu\right) +
 !!     re^\xi\nu&=&f & {\rm in} \,\,K \\[2mm]
 !!    - \mu e^\xi\nabla \nu \cdot n&=&\hat{q}_n &
 !!    {\rm on}\,\,\partial K
 !!    \end{array}\right.
 !! \f}
 !! and setting \f$u^* = \nu e^\xi\f$, with \f$a =\mu\nabla\xi\f$.
 !!
 !! \note When \f$r=0\f$ and additional constraint on the average of
 !! \f$\nu\f$ nust be enforced, as in \c rec_us.
 !! \note Due to the fact that \f$u^*\f$ is not a polynomial function,
 !! what is stored in \c uuusef are in fact the coefficients of
 !! \f$\nu\f$.
 !! \note This function should be called only for potential velocity
 !! fields, so that \f$\xi\f$ is well defined. The coresponding value
 !! must be provided by \c coeff_xiadv.
 !! \note The function \f$\xi\f$ is defined up to a constant, which
 !! can be defined on an element basis. To improve the numerical
 !! stability of the method, we subtract a reference value for each
 !! element, and since such a value must be knwn at the time of
 !! recovering \f$u^*\f$ from \f$\nu\f$, we store it in the vector \c
 !! xiadv_el.
 !! \warning It is assumed here that \c bases is an orthogonal basis.
 !<
 subroutine rec_usef(grid,base,lam)
  type(t_grid), target, intent(in) :: grid
  type(t_base), intent(in) :: base
  real(wp), intent(in) :: lam(:)
 
  integer :: ie, l, i, j, isl
  real(wp) :: ubar, uuuk(base%pk),                   &
    xm(grid%m,grid%d+1), xi_side(grid%d+1), mean_p1, &
    xi_e(1) ! a vector is used to simplify the function calls
  real(wp) :: sigma_p
  real(wp), allocatable :: xg(:,:), mu(:,:,:), xi(:), sigma(:),  &
    f(:), ssk(:,:), fk(:), bi_mu_bit(:,:), uuusk(:), w_expxi(:), &
    mu_grad(:), e_low(:,:), wgsa(:), qhn_s(:)
  character(len=*), parameter :: &
    this_sub_name = 'rec_usef'

   !--------------------------------------------------------------------
   !1) The high order basis is the same used for the other
   !   postprocessing: the module variable bases. We assume here that
   !   this basis has already been defined, and we simply precompute
   !   the mean value of the first basis function.
   mean_p1 = sum(bases%wg*bases%p(1,:))/bases%me%vol
   ! Concerning the boundary quadrature, we have the same problem
   ! discussed in rec_us, so we proceed in the same way.
   allocate(e_low(base%nk,bases%ms))
   do i=1,base%nk
     e_low(i,:) = ev(base%e_s(i),bases%xigs)
   enddo
   !--------------------------------------------------------------------

   !$omp parallel private(xg,mu,xi,sigma,f,bi_mu_bit,           &
   !$omp                  mu_grad,sigma_p, qhn_s,               &
   !$omp                  ssk,fk,uuusk, ie,l,i,j,isl,           &
   !$omp                  xi_e,w_expxi,wgsa,xm,xi_side,ubar,uuuk) &
   !$omp           shared(base,bases,grid,uuusef,xiadv_el,mean_p1, &
   !$omp                  coeff_diff,coeff_xiadv,coeff_re,coeff_f, &
   !$omp                  e_low,qhn,lam,uuu) &
   !$omp          default(none)

   !--------------------------------------------------------------------
   !1+1/2) Allocations
   !$omp single
    allocate(uuusef(grid%ne*bases%pk),xiadv_el(grid%ne)) ! module var
   !$omp end single
   allocate( xg(grid%m,bases%m) , mu(grid%m,grid%m,bases%m) ,      &
     xi(bases%m) , sigma(bases%m) , f(bases%m) , mu_grad(grid%m) , &
     bi_mu_bit(grid%m,grid%m) , w_expxi(bases%m) ,                 &
     ssk(bases%pk,bases%pk) , fk(bases%pk) , uuusk(bases%pk) )
   allocate( wgsa(bases%ms) , qhn_s(bases%ms) )
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !2) Solve the local elliptic problems
   !$omp do schedule(static)
   elem_loop: do ie=1,grid%ne

     !2.1) problem coefficients
     xg = affmap(grid%e(ie),bases%xig)
     mu    = coeff_diff(xg)
     xi_e  = coeff_xiadv(reshape( grid%e(ie)%xb , (/grid%m,1/) ))
     xi    = coeff_xiadv(xg)-xi_e(1)
     sigma = coeff_re(xg)
     f     = coeff_f(xg)
     ! modified diffusion and reaction coefficients
     w_expxi = bases%wg*exp(xi)
     do l=1,bases%m
       mu(:,:,l) = mu(:,:,l)*w_expxi(l)
     enddo
     sigma = sigma*w_expxi

     !2.2) local matrix
     !2.2.1) internal contributions
     ssk = 0.0_wp
     fk = 0.0_wp
     do l=1,bases%m
       ! bi_mu_bit = bk^{-1} * mu * bk^{-T}
       bi_mu_bit = matmul( grid%e(ie)%bi ,                       &
                      matmul(mu(:,:,l),transpose(grid%e(ie)%bi)) )
       do i=1,bases%pk
         mu_grad = matmul( bi_mu_bit , bases%gradp(:,i,l) )
         sigma_p = sigma(l) * bases%p(i,l)
         do j=1,bases%pk
           ssk(i,j) = ssk(i,j) + (                               &
                     dot_product( bases%gradp(:,j,l) , mu_grad ) &
                                      + bases%p(j,l) * sigma_p )
         enddo
         fk(i) = fk(i) + bases%wg(l) * f(l) * bases%p(i,l)
       enddo
     enddo
     ssk = grid%e(ie)%det_b*ssk
     fk  = grid%e(ie)%det_b*fk

     !2.2.2) boundary contributions
     do isl=1,grid%d+1
       ! scaled edge weigths
       wgsa = (grid%e(ie)%s(isl)%p%a/bases%me%voldm1) * &
                bases%wgs(bases%stab(grid%e(ie)%ip(isl),:))
       qhn_s = matmul(qhn(:,isl,ie),e_low)
       do l=1,bases%ms
         do i=1,bases%pk
           fk(i) = fk(i) - wgsa(l) * qhn_s(l)*bases%pb(i,l,isl)
         enddo
       enddo
     enddo

     !2.3) force the mean value if sigma is zero
     if(minval(sigma).le.0.0_wp) then
       !2.3.1) local solution
       if(base%k.eq.0) then
         ! for r.eq.0 the dofs coincide with the edges
         do i=1,grid%d+1
           xm(:,i) = grid%e(ie)%s(i)%p%xb
         enddo
         xi_side = coeff_xiadv(xm)-xi_e(1) ! xi at the midpoints
         ubar = sum(exp(-xi_side)*lam(grid%e(ie)%is))/real(grid%d+1,wp)
       else
         uuuk = uuu((ie-1)*base%pk+1 : ie*base%pk)
         ! compute the mean value
         ubar = 0.0_wp
         do l=1,base%m
           do j=1,base%pk
             ubar = ubar + base%wg(l)*exp(-xi(l))*base%p(j,l)*uuuk(j)
           enddo
         enddo
         ubar = ubar/base%me%vol
       endif
       !2.3.2) assuming that the first test function is constant, we
       !replace the first equation with the condition on the average,
       !and since the basis is orthogonal the only contribution to the
       !average comes from the first basis function.
       ssk(1,1) = mean_p1
       ssk(1,2:) = 0.0_wp
       fk(1) = ubar

       !2.4) solve the local problem
       uuusk(1) = fk(1)/ssk(1,1)
       call linsys_chol( ssk(2:,2:) , fk(2:)-ssk(2:,1)*uuusk(1), &
                         uuusk(2:) )
     else
       !2.4) solve the local problem
       call linsys_chol(ssk,fk,uuusk)
     endif

     !2.5) store back the computed value
     uuusef((ie-1)*bases%pk+1 : ie*bases%pk) = uuusk
     xiadv_el(ie) = xi_e(1)

   enddo elem_loop
   !$omp end do nowait
   !--------------------------------------------------------------------

   deallocate( xg , mu , xi , &
       sigma , f , ssk ,      &
       bi_mu_bit , mu_grad ,  &
       fk , uuusk , w_expxi , &
       wgsa , qhn_s )
   !$omp end parallel

   deallocate( e_low )

 end subroutine rec_usef

!-----------------------------------------------------------------------

 !> Reconstruction \f$q^\star_{\mathbb{RT}}\f$: basis and degrees of
 !! freedom.
 !!
 !! \f$q^\star_{\mathbb{RT}}\f$ is computed considering, for each
 !! element, the projector
 !! \f{equation}{
 !!    \left\{ \begin{array}{rcll}
 !!    \displaystyle
 !!  \int_e q^\star_{\mathbb{RT}}\cdot n\,\eta\,d\sigma & = &
 !!    \displaystyle
 !!  \int_e \hat{q}_n \eta \,d\sigma &
 !!   \forall \eta\in\mathbb{P}_k(e)\\[3mm]
 !!    \displaystyle
 !!  \int_K q^\star_{\mathbb{RT}} \cdot\phi\,dx & = &
 !!    \displaystyle
 !!  \int_K q\cdot \phi \,dx &
 !!   \forall \phi\in\left(\mathbb{P}_{k-1}\right)^d(K).
 !!    \end{array}\right..
 !! \f}
 !! This projector is closely related to the canonical degrees of
 !! freedom of the \f$\mathbb{RT}_k\f$ space discussed in \c
 !! re_vect_canonical_dofs, and we follow here the construction
 !! described therein.
 !<
 subroutine rec_qsrt(grid,base)
  type(t_grid), target, intent(in) :: grid
  type(t_base), intent(in) :: base ! the original basis
 
  integer :: ie, i, isl, shift_is, ii
  real(wp) :: wgsa(base%ms), qk(grid%d,base%m), qhn_s(base%ms)
  real(wp), allocatable :: qqqsk(:)
  type(t_base) :: base_test

   !$omp parallel private(qqqsk,ie,isl,i,shift_is,ii,wgsa,qk, &
   !$omp                  base_test,qhn_s) &
   !$omp           shared(base,base_qsrt,grid,qqqsrt,qhn,qqq) &
   !$omp          default(none)

   !--------------------------------------------------------------------
   !1) Define the RT vector basis of order k. Concerning the
   ! quadrature formulas: both on the boundary and in the interior of
   ! the elements we need to integrate 2*k. This is the exactness
   ! required in the original system, so that the same quadrature
   ! nodes can be used.
   !$omp single
    base_qsrt = rt_vect( base%me , base%k ,                           &
      xig =base%xig  , wg =base%wg  , deg =base%deg,                  &
      xigs=base%xigs , wgs=base%wgs , degs=base%degs , stab=base%stab )
   !$omp end single

   ! The test basis for the interior is also necessary (for the
   ! boundary terms, base can be used)
   if(base%k.gt.0) &
     base_test = dg_vect( base%me , base%k-1 ,                       &
     xig =base%xig  , wg =base%wg  , deg =base%deg,                  &
     xigs=base%xigs , wgs=base%wgs , degs=base%degs , stab=base%stab )

   ! Compute the canonical RT dofs
   !$omp single
    base_qsrt = rt_vect_canonical_dofs(base_qsrt,base%e_s,base_test%o_s)
   !$omp end single
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !1+1/2) Allocations
   !$omp single
    allocate(qqqsrt(grid%ne*base_qsrt%mk)) ! module variable all
   !$omp end single
   allocate( qqqsk(base_qsrt%mk) )
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !2) Compute the degrees of freedom
   !$omp do schedule(static)
   elem_loop: do ie=1,grid%ne

     ! 2.1) boundary functionals
     do isl=1,grid%me%d+1
       shift_is = (isl-1)*base%nk
       ! Notice: the boundary test functions are not associated with
       ! the sides of the mesh, but with the sides of the element,
       ! since the degrees of freedom are transformed with the
       ! element. Hence, the boundary nodes don't have to be reordered
       ! according with the intrinsic orientation of the mesh, but
       ! must follow the orientation given by the reference element.
       ! We thus simply incorporate the area factor in the weights,
       ! without any reordering.
       wgsa = (grid%e(ie)%s(isl)%p%a/base%me%voldm1) * base%wgs
       qhn_s = matmul(qhn(:,isl,ie),base%e)
       do i=1,base%nk
         ii = shift_is + i
         qqqsk(ii) = sum(wgsa*qhn_s*base%e(i,:))
       enddo
     enddo

     !2.2) element functionals: this can be easily evaluated on the
     !reference element
     if(base%k.gt.0) then
       ! compute the original vector (and include the weights)
       do i=1,base%me%d
         qk(i,:) = base%wg * &
           matmul( qqq((ie-1)*base%mk+1 : ie*base%mk) , base%o(i,:,:) )
       enddo
       ! compute the functional
       shift_is = (grid%me%d+1)*base%nk
       do i=1,base_test%mk
         ii = shift_is + i
         qqqsk(ii) = sum( qk*base_test%o(:,i,:) )
       enddo
     endif

     !2.2) store back the computed value
     qqqsrt((ie-1)*base_qsrt%mk+1 : ie*base_qsrt%mk) = qqqsk

   enddo elem_loop
   !$omp end do nowait
   !--------------------------------------------------------------------

   deallocate( qqqsk )
   !$omp end parallel

 end subroutine rec_qsrt

!-----------------------------------------------------------------------

! subroutine rec_qsdg(base)
! ! In this subroutine the reconstructed solution q^\star (in the DG
! ! space) is computed, together with the associated basis. See
! ! rec_qsrt for details.
!  type(t_base), intent(in) :: base ! the original basis
! 
!  integer :: ie, l, i, j, isl, shift_is, ii
!  real(wp) :: wgel(base%me), bktbk(ndim,ndim), qk(ndim,base%m), &
!              b_test(ndim)
!  real(wp), allocatable :: ssk(:,:), fk(:), qqqsk(:)
!  type(t_base) :: base_test, base_test_rot
!
!   !--------------------------------------------------------------------
!   !1) Define the DG vector basis of order r. Concerning the
!   ! quadrature formulas: both on the boundary and in the interior of
!   ! the elements we need to integrate 2*r. This is the exactness
!   ! required in the original system, so that the same quadrature
!   ! nodes can be used.
!   base_qsdg = dg_vect( base%r ,                                 &
!               xig =base%xig  , wg =base%wg  , deg2d=base%deg2d, &
!               xige=base%xige , wge=base%wge , deg1d=base%deg1d )
!   ! The test basis for the interior is also necessary (for the
!   ! boundary terms, base can be used)
!   if(base%r.gt.0) &
!     base_test = dg_scal( base%r-1 ,                               &
!                 xig =base%xig  , wg =base%wg  , deg2d=base%deg2d, &
!                 xige=base%xige , wge=base%wge , deg1d=base%deg1d )
!   if(base%r.gt.1) &
!     base_test_rot = solen_vect( base%r ,                          &
!                 xig =base%xig  , wg =base%wg  , deg2d=base%deg2d, &
!                 xige=base%xige , wge=base%wge , deg1d=base%deg1d )
!   ! Adjust the basis to the chosen test functions
!   base_qsdg = adapt_dg_vect(base_qsdg,base%e_s,                &
!                             base_test%gradp_s,base_test_rot%o_s)
!   !--------------------------------------------------------------------
!
!   !$omp parallel private(ssk,fk,qqqsk,             &
!   !$omp                  ie,isl,i,shift_is,l,ii,j, &
!   !$omp                  wgel,bktbk,qk,b_test)     &
!   !$omp           shared(base,base_qsdg,base_test,base_test_rot, &
!   !$omp                  grid,qqqsdg,qhn,qqq,ndim)     &
!   !$omp          default(none)
!
!   !--------------------------------------------------------------------
!   !1+1/2) Allocations
!   !$omp single
!    allocate(qqqsdg(grid%ne*base_qsdg%mk)) ! module variable all
!   !$omp end single
!   allocate( qqqsk(base_qsdg%mk) )
!   if(base%r.gt.1) &
!     allocate( ssk(base_test_rot%mk,base_qsdg%mk) , &
!                fk(base_test_rot%mk) )
!   !--------------------------------------------------------------------
!
!   !--------------------------------------------------------------------
!   !2) Solve the local elliptic problems
!   !$omp do schedule(static)
!   elem_loop: do ie=1,grid%ne
!
!     qqqsk = 0.0_wp
!     !2.1.1) boundary contributions
!     do isl=1,3
!       shift_is = (isl-1)*base%nk
!       wgel = base%wge*grid%e(ie)%s(isl)%p%l/2.0_wp
!       do l=1,base%me
!         do i=1,base%nk
!           ii = shift_is + i
!           qqqsk(ii) = qqqsk(ii) + wgel(l) * qhn(ie,l,isl)*base%e(i,l)
!         enddo
!       enddo
!     enddo
!
!     !2.1.2) internal contributions
!     if(base%r.gt.0) then
!       ! original numerical solution
!       do j=1,ndim
!         qk(j,:) = matmul( qqq((ie-1)*base%mk+1 : ie*base%mk) , &
!                           base%o(j,:,:) )
!       enddo
!       shift_is = 3*base%nk
!       do l=1,base%m
!         do i=2,base_test%pk ! skip the first function: constant
!           ii = shift_is + i - 1
!           qqqsk(ii) = qqqsk(ii) + base%wg(l) * &
!             dot_product(qk(:,l),base_test%gradp(:,i,l))
!         enddo
!       enddo
!
!       if(base%r.gt.1) then
!       ! The rotational degrees of freedom must be computed in
!       ! physical space
!         ssk = 0.0_wp
!         fk = 0.0_wp
!         bktbk = matmul(transpose(grid%e(ie)%bk),grid%e(ie)%bk)
!         ! in these equations a factor 1/(2|K|) is simplified on both
!         ! sides of the equation.
!         do l=1,base%m
!           do i=1,base_test_rot%mk
!             b_test = matmul(bktbk,base_test_rot%o(:,i,l))
!             do j=1,base_qsdg%mk
!               ssk(i,j) = ssk(i,j) + base%wg(l) *     &
!                 dot_product(base_qsdg%o(:,j,l),b_test)
!             enddo
!             fk(i) = fk(i) + base%wg(l) * dot_product(qk(:,l),b_test)
!           enddo
!         enddo
!
!         ! solve the local system
!         shift_is = 3*base%nk + base_test%pk - 1
!         fk = fk - matmul( ssk(:,1:shift_is) , qqqsk(1:shift_is) )
!         call linsys( ssk(:,shift_is+1:) , fk , qqqsk(shift_is+1:) )
!       endif
!
!     endif
!
!     !2.2) store back the computed value
!     qqqsdg((ie-1)*base_qsdg%mk+1 : ie*base_qsdg%mk) = qqqsk
!
!   enddo elem_loop
!   !$omp end do nowait
!   !--------------------------------------------------------------------
!
!   deallocate( qqqsk )
!   if(base%r.gt.1) deallocate( ssk , fk )
!   !$omp end parallel
!
! end subroutine rec_qsdg

!-----------------------------------------------------------------------

end module mod_ldgh_reconstructions

